# "Efficiency and water use: Dynamic effects of irrigation technology adoption"
# by Micah Cameron-Harp and Nathan Hendricks
# 
# Code written by Micah Cameron-Harp
# May 16th, 2024
# 
# This r script performs a bacon decomposition for both transitions
#   in irrigation technology to illustrate the potential for bias
#   in TWFE estimates. It produces the results displayed in Table B.1 and
#   Figure B.4.
pacman::p_load(
  did,
  tidyverse,
  dplyr,
  bacondecomp,
  ggplot2,
  data.table,
  cowplot
)

# Parse arguments submitted by STATA
args = commandArgs(trailingOnly = "TRUE")
arg1 <- args[1]

#Define directories and set working directory
# NOTE - The root directory will be passed here from the main_text_results.do file 
dr_root <- paste0(arg1)
dr_data <- paste0(dr_root, "/data")
dr_output = paste0(dr_root, "/outputs")
dr_output_main = paste0(dr_root, "/outputs/main_text")
dr_output_app = paste0(dr_root, "/outputs/appendices")
dr_output_log = paste0(dr_root, "/outputs/logs")
dr_temp = paste0(dr_root, "/data/intermediate")
setwd(dr_temp)

########################################
#---------- Flood to Center Pivot or LEPA -----#
wrg_flood_cplepa <- read.csv(file = file.path(dr_temp, "flood_cporlepa_prepped_bacon.csv"))

#Create count of observations 
wrg_flood_cplepa <- wrg_flood_cplepa %>%
  group_by(WR_GROUP) %>%
  mutate(count = n())

#Do bacon decomp after creating a balanced panel
wrg_flood_cplepa_balanced <- wrg_flood_cplepa %>% filter(count==29)

#Example code we need to run to grab column titles
df_bacon <- bacon(af_used ~ treat_status,
                  data = wrg_flood_cplepa_balanced,
                  id_var = "WR_GROUP",
                  time_var = "wua_year")
type_list <- c(unique(df_bacon$type))

#Define function to store output printed in r
format_bacon_output <- function(x, df) {
  out <- df %>% dplyr::filter(type==x)
  out_type <- x
  out_weight <- sum(out$weight)
  out_avgest <- weighted.mean(out$estimate, out$weight)
  out_df <- data.frame(type = out_type,
                       weight = out_weight,
                       avg_est = out_avgest)
}

#Do it in a loop for all dependant variables
#List the dependent variables
effect_order <- c("af_used", "acres_irr", "depth_applied")

for (i in effect_order) {
  wrg_flood_cplepa_bacon <- wrg_flood_cplepa_balanced
  if (i == "depth_applied") {
    wrg_flood_cplepa_bacon <- wrg_flood_cplepa_bacon %>% dplyr::filter(acres_irr!=0)
    wrg_flood_cplepa_bacon <- wrg_flood_cplepa_bacon %>%
      group_by(WR_GROUP) %>%
      mutate(count = n())
    wrg_flood_cplepa_bacon <- wrg_flood_cplepa_bacon %>% filter(count==29)
  }
  bac_form <- as.formula(paste0(i, " ~ treat_status"))
  ret_bacon <- bacon(bac_form,
                     data = wrg_flood_cplepa_bacon,
                     id_var = "WR_GROUP",
                     time_var = "wua_year")
  output_df <- ret_bacon
  write.csv(output_df, file = paste0(dr_temp, "/bacon_flood_cplepa_", i, ".csv"), row.names = FALSE)
  
  #Now output the aggregated weights and estimates by type   
  output_list <- lapply(type_list, format_bacon_output, df = output_df) 
  simple_out_df <- rbindlist(output_list)
  simple_out_df <- cbind(simple_out_df, dep_var = paste0(i))
  saveRDS(simple_out_df, file = paste0(dr_temp, "/bacon_flood_cplepa_sum_", i, ".rds"))
}
#Remove unnecessary objects
rm(wrg_flood_cplepa, wrg_flood_cplepa_balanced)
gc()

########################################
#---------- Center Pivot to LEPA -----#
rm(df_bacon, output_df, output_list, ret_bacon)
wrg_cp_lepa <- read.csv(file = file.path(dr_temp, "cp_lepa_prepped.csv"))
wrg_cp_lepa <- wrg_cp_lepa %>%
  group_by(WR_GROUP) %>%
  mutate(count = n())
#Do bacon decomp after creating a balanced panel
wrg_cp_lepa_balanced <- wrg_cp_lepa %>% filter(count==29)

#Need to add "Treated vs. Untreated" to the type_list object
type_list <- c("Earlier vs Later Treated", "Later vs Earlier Treated", "Treated vs Untreated")

#Do it in a loop for all dependant variables
#List the dependent variables
effect_order <- c("af_used", "acres_irr", "depth_applied")

for (i in effect_order) {
  wrg_cp_lepa_bacon <- wrg_cp_lepa_balanced
  if (i == "depth_applied") {
    wrg_cp_lepa_bacon <- wrg_cp_lepa_bacon %>% dplyr::filter(acres_irr!=0)
    wrg_cp_lepa_bacon <- wrg_cp_lepa_bacon %>%
      group_by(WR_GROUP) %>%
      mutate(count = n())
    wrg_cp_lepa_bacon <- wrg_cp_lepa_bacon %>% filter(count==29)
  }
  #Without covariates
  bac_form <- as.formula(paste0(i, " ~ treat_status"))
  ret_bacon <- bacon(bac_form,
                     data = wrg_cp_lepa_bacon,
                     id_var = "WR_GROUP",
                     time_var = "wua_year")
  output_df <- ret_bacon
  write.csv(output_df, file = paste0(dr_temp, "/bacon_cp_lepa_", i, ".csv"), row.names = FALSE)
  
  #Now output the aggregated weights and estimates by type   
  output_list <- lapply(type_list, format_bacon_output, df = output_df) 
  simple_out_df <- rbindlist(output_list)
  simple_out_df <- cbind(simple_out_df, dep_var = paste0(i))
  saveRDS(simple_out_df, file = paste0(dr_temp, "/bacon_cp_lepa_sum_", i, ".rds"))
}
#Remove unnecessary objects
rm(wrg_cp_lepa, wrg_cp_lepa_balanced)
gc()

########    
### FINAL STEP
#Combine all the summarized bacon decomp results into one table
tec_list <- c("flood_cplepa", "cp_lepa")
effect_order <- c("af_used", "acres_irr", "depth_applied")

all_tech_all_depvar_bacon_sum <- lapply(tec_list, function(x) {
  file_list <- list.files(path = dr_temp,
                          pattern = paste0("bacon_", x, "_sum_"), 
                          full.names = TRUE)
  out <- lapply(file_list, readRDS)
  out <- rbindlist(out)
  out_df <- cbind(out, tech = c(paste0(x)))
})

#Now combine the three dataframes
final_all_tech_simple_bacon <- rbindlist(all_tech_all_depvar_bacon_sum[1:2])

#Save
saveRDS(final_all_tech_simple_bacon, file = file.path(dr_output_app, "/tableB1.rds"))

#Make one version to easily copy paste into Word table in appendix
ord1 <- tec_list
ord2 <- c("af_used", "acres_irr", "depth_applied")
for_word <- final_all_tech_simple_bacon %>% 
  dplyr::mutate(f1 = factor(tech, levels = ord1),
                f2 = factor(dep_var, levels = ord2))
final_for_word <- arrange(for_word, f1, f2)
final_for_word <- final_for_word %>% dplyr::mutate(weight = round(weight, digits = 2),
                                                   avg_est = round(avg_est, digits = 2))
write.csv(final_for_word, file = file.path(dr_output_app, "/tableB1.csv"), row.names = FALSE )

################################################################################
#Load in the individual bacon decompositions and combine into 
#one file to plot the results for each dependent variable
#Load in the first one 
setwd(dr_temp)
first_df <- read.csv("bacon_flood_cplepa_af_used.csv")
first_df$tech <- "flood_cplepa"
first_df$dep_var <- "af_used"
final_df <- first_df[0,]

#Combine all the individual 2 by 2 bacon decomp results into one table
tec_list <- c("flood_cplepa_", "cp_lepa_")
effect_order <- c("af_used", "acres_irr", "depth_applied")

for (tec in tec_list) {
  for (depvar in effect_order) {
    cur_df <- read.csv(paste0("bacon_", tec, depvar, ".csv"))
    cur_df$dep_var <- depvar
    if (tec==tec_list[1]) {cur_df$tech <- "flood_cplepa"}
    if (tec==tec_list[2]) {cur_df$tech <- "cp_lepa"}
    final_df <- rbind(final_df, cur_df)
  }
}

#Save
saveRDS(final_df, file = file.path(dr_temp, "/two_by_two_bacon_results.rds"))


################# Figure B.4 ###################################################
#Make dfs specific to each transition
flood_df <- final_df %>% dplyr::filter(tech=="flood_cplepa")
cp_df <- final_df %>% dplyr::filter(tech=="cp_lepa")

#Make list of dependant variables
effect_order <- c("af_used", "acres_irr", "depth_applied")

#Create a list containing the 3 flood to cplepa graphs
flood_graphs <- lapply(effect_order, function(x) {
  df <- flood_df %>% dplyr::filter(dep_var==paste0(x))
  out_graph <- ggplot(df) + 
    geom_point(aes(x=weight, y=estimate, color = type, shape = type, alpha = type), size = 0.5) +
    labs(color="Comparison") +
    scale_color_manual(values = c("Earlier vs Later Treated" = "dodgerblue2", "Later vs Earlier Treated" = "orange", "Treated vs Untreated" = "darkorchid4")) +
    scale_shape_manual(values = c(19, 15, 17)) + 
    scale_alpha_manual(values = c(0.4, 0.7, 1)) +
    theme(panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          panel.background = element_blank(),
          legend.key=element_rect(fil="white"),
          legend.position="bottom") +
    theme(axis.line = element_line(colour = "black")) +
    guides(color = guide_legend(title.position = "top",  title.hjust = .5, override.aes = list(size=3, shape = c(19, 15, 17))),
           shape = "none",
           alpha = "none") +
    theme(legend.box="vertical") +
    theme(text=element_text(size=9,  family="serif")) +
    xlab("Weight") +
    ylab("Estimated effect") +
    theme(legend.margin=margin(0,0,0,0),
          legend.box.margin=margin(0, 0, 0, 0),
          legend.spacing.y = unit(0.05, 'cm'))
  out_graph
})

#Create a list containing the cp to lepa graphs
cp_graphs <- lapply(effect_order, function(x) {
  df <- cp_df %>% dplyr::filter(dep_var==paste0(x))
  out_graph <- ggplot(df) + 
    geom_point(aes(x=weight, y=estimate, color = type, shape = type, alpha=type), size = 0.5) +
    labs(color="Comparison") +
    scale_color_manual(values = c("Earlier vs Later Treated" = "dodgerblue2", "Later vs Earlier Treated" = "orange", "Treated vs Untreated" = "darkorchid4")) +
    scale_shape_manual(values = c(19, 15, 17)) + 
    scale_alpha_manual(values = c(0.4, 0.7, 1)) +
    theme(panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          panel.background = element_blank(),
          legend.key=element_rect(fil="white"),
          legend.position="bottom") +
    theme(axis.line = element_line(colour = "black")) +
    guides(color = guide_legend(title.position = "top",  title.hjust = .5, override.aes = list(size=3, shape = c(19, 15, 17))),
           shape = "none",
           alpha = "none") +
    theme(legend.box="vertical") +
    theme(text=element_text(size=10,  family="serif")) +
    xlab("Weight") +
    ylab("Estimated effect") +
    theme(legend.margin=margin(0,0,0,0),
          legend.box.margin=margin(0, 0, 0, 0),
          legend.spacing.y = unit(0.05, 'cm'))
  out_graph
})

#Grab the legend from one graph
legend <- get_legend(flood_graphs[[1]] + theme(legend.position = 'bottom',
                                               legend.margin=margin(0,0,0,0),
                                               legend.box.margin=margin(-10,-10,-10,-10),
                                               legend.spacing.y = unit(0.25, 'cm'),
                                               legend.key=element_blank(),
                                               legend.key.size = unit(1, "cm"),
                                               legend.text=element_text(size=10),
                                               legend.text.align = 0,
                                               legend.title = element_text(hjust = 0.5)) + guides(linetype=guide_legend(ncol =1, title.position = "top"))) 

#Put them together
#Make flood row title
flood_title <- ggdraw() + draw_label("Flood to center pivot", fontfamily = "serif", size = 14, hjust=0.5)
flood_row <- plot_grid(
  flood_graphs[[1]] + ggtitle("Acre-feet withdrawn") + theme(text=element_text(size=10,  family="serif")) + theme(legend.position="none"),
  flood_graphs[[2]] + ggtitle("Acres irrigated") + theme(text=element_text(size=10,  family="serif")) + theme(legend.position="none"),
  flood_graphs[[3]] + ggtitle("Depth-applied") + theme(text=element_text(size=10,  family="serif")) + theme(legend.position="none"),
  align = 'vh',
  nrow = 1
)
final_flood_row <- plot_grid(flood_title, flood_row, ncol=1, rel_heights=c(0.1, 1))

#Make cp title
cp_title <- ggdraw() + draw_label("Traditional center pivot to LEPA", fontfamily = "serif", size = 14, hjust=0.5)
cp_row <- plot_grid(
  cp_graphs[[1]] + theme(text=element_text(size=10,  family="serif")) + theme(legend.position="none"),
  cp_graphs[[2]] + theme(text=element_text(size=10,  family="serif")) + theme(legend.position="none"),
  cp_graphs[[3]] + theme(text=element_text(size=10,  family="serif")) + theme(legend.position="none"),
  align = 'vh',
  nrow = 1
)
final_cp_row <- plot_grid(cp_title, cp_row, ncol=1, rel_heights=c(0.1, 1))

#Add them together with the legend
plot_grid(final_flood_row, final_cp_row, 
          legend,
          ncol=1, rel_heights=c(1, 1, 0.4)) # rel_heights values control title margins
ggdraw
ggsave2("figureB4.tiff", device = "tiff",
        path = paste0(dr_output_app),
        width = 6.5,
        height = 8.4,
        dpi = 500,
        units = "in")  